###############################
#
#define settings
#
###############################
GET_SAMPLE_NAMES <- TRUE # if set to FALSE define it in the next line
#sample_names_user <- c()
unique <- params$unique # how many peptides should be at least necessary to identify a protein
# which types should be evaluated TRUE or FALSE could be used
RAZOR <- TRUE
UNIQUE <- TRUE
LFQ <- FALSE
# define path of the proteinGroups.txt file(s)
path = params$path
Filtering:
Potential.contaminant != "+" -> remove where
contaminants
Reverse != "+"-> remove where reverse =
+
Only.identified.by.site != "+" -> remove where
only identified by site = +
data[name] >= unique -> keep only
identifications which have greater or equal number (defined via variable
unique) of razor/unique peptides | typically
this is set to 1 or 2
Data analysis show the number of identified proteins and
the heatmap. In Summary the final results are visualized as
boxplot and a table with the summarized values is shown.
Filtering:
Potential.contaminant != "+" -> remove where
contaminants = +Reverse != "+"-> remove where reverse = +Only.identified.by.site != "+" -> remove where only
identified by site = +Then the log2of the LFQ intensities is calculated and
all -Inf values were substituted by NA using the following
command: lfq[lfq == -Inf] <- NA
Data analysis show the number of identified proteins and
the heatmap. In Summary the final results are visualized as
boxplot and a table with the summarized values is shown.
# load libraries
library(tidyverse)
library(dplyr)
library(pheatmap)
library(ggplot2)
library(reshape2)
library(knitr)
library(plotly)
# load functions
source("filtering_function.R")
print("loading sucessful")
## [1] "loading sucessful"
The txt-files of the defined folder are read in, processed and filtered automatically. This section shows the code, heatmaps and also some summary statistics.
infile <- read.csv(path, dec=".", sep="\t")
res_raw <- list(infile)
########## get sample names
if (GET_SAMPLE_NAMES == TRUE){
#get sample names
tmp <- colnames(res_raw[[1]])
tmp <- tmp[grep("Razor...unique.peptides.",tmp)]
tmp <- sub(".*Razor...unique.peptides.", "",tmp)
sample_names_raw <- tmp
rm(tmp)
#create sample names
sample_names_razor <- paste("Razor...unique.peptides.",sample_names_raw, sep="")
sample_names_unique <- paste("Unique.peptides.",sample_names_raw, sep="")
sample_names_lfq <- paste("LFQ.intensity.",sample_names_raw, sep="")
} else {
res_raw[[1]] <- res_raw[[1]] %>%
rename_with(~ paste("Razor_", sample_names_user, sep = ""),
starts_with("Razor...unique.peptides.")) %>%
rename_with(~ paste("Unique_", sample_names_user, sep = ""),
starts_with("Unique.peptides.")) %>%
rename_with(~ paste("LFQ_", sample_names_user, sep = ""),
starts_with("LFQ.intensity."))
# get column names
sample_names_razor <- colnames(res_raw[[i]][grep("Razor_",
colnames(res_raw[[i]]))])
sample_names_unique <- colnames(res_raw[[i]][grep("Unique_",
colnames(res_raw[[i]]))])
sample_names_lfq <- colnames(res_raw[[i]][grep("LFQ_",
colnames(res_raw[[i]]))])
}
########## create result data frame & result lists
# create data frame for razor
results_2gether_razor <- data.frame(matrix(ncol = length(sample_names_razor), nrow = 1))
colnames(results_2gether_razor) <- sample_names_razor
#create data frame for unique
results_2gether_unique <- data.frame(matrix(ncol = length(sample_names_unique), nrow = 1))
colnames(results_2gether_unique) <- sample_names_unique
#create data frame for lfq
results_2gether_lfq <- data.frame(matrix(ncol = length(sample_names_lfq), nrow = 1))
colnames(results_2gether_lfq) <- sample_names_lfq
#results lists
results_razor <- list()
results_unique <- list()
results_lfq <- list()
########## perform evaluation
if (RAZOR == TRUE) {
cat("\n")
cat("##","Razor","\n")
cat("\n")
data_raw <- as.data.frame(res_raw[[1]])
data <- filtering(data_raw)
fasta <- data$Fasta.headers
results <- data.frame(matrix(ncol = length(sample_names_razor), nrow = nrow(data)))
colnames(results) <- sample_names_razor
i <- 0
for (i in 1:length(sample_names_razor)){
name <- sample_names_razor[i]
tmp <- (data[name] >= unique)
results[i] <- tmp
cat(name,"<br/>",sum(tmp, na.rm = TRUE),"<br/>")
}
results_2gether_razor[1,] <- sapply(results,function(x)sum(x, na.rm = TRUE))
results$FASTA <- data$Fasta.headers
results_razor[[1]] <- results
cat("\n")
#create heatmaps
paletteLength <- 2
myColor <- colorRampPalette(c("navy", "white",
"firebrick3"))(paletteLength)
data4pheatmap <- results[,-ncol(results)]
data4pheatmap <- data4pheatmap*1
data4pheatmap[data4pheatmap == 0] <- NA
# remove NA rows
ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
data4pheatmap_clear <- data4pheatmap[!ind,]
data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
if (ncol(data4pheatmap_clear) < 2){
cat("\n")
cat("##"," No heatmap possible. Too less columns in the result
table.","\n")
} else {
pheatmap(data4pheatmap_clear,
legend_breaks = c(0,1),
color = myColor,
treeheight_row = 10,
angle_col ="45",
treeheight_col = 10,
legend = TRUE,
labels_row = rep("",nrow(data4pheatmap)),
labels_col = sample_names_raw)
cat("\n")
}
}
[1] “!NA values detected!” Reverse 1 100
Razor…unique.peptides.E3_D1_a_01
12
Razor…unique.peptides.E3_D1_a_02
9
Razor…unique.peptides.E3_D1_a_03
7
Razor…unique.peptides.E3_D1_b_01
7
Razor…unique.peptides.E3_D1_b_02
10
Razor…unique.peptides.E3_D1_b_03
11
Razor…unique.peptides.E3_D1_c_01
4
Razor…unique.peptides.E3_D1_c_02
6
Razor…unique.peptides.E3_D1_c_03
5
Razor…unique.peptides.E3_D1_d_01
4
Razor…unique.peptides.E3_D1_d_02
7
Razor…unique.peptides.E3_D1_d_03
6
Razor…unique.peptides.E3_D1_e_01
11
Razor…unique.peptides.E3_D1_e_02
11
Razor…unique.peptides.E3_D1_e_03
12
Razor…unique.peptides.E3_D1_f_01
10
Razor…unique.peptides.E3_D1_f_02
10
Razor…unique.peptides.E3_D1_f_03
10
if (UNIQUE == TRUE) {
cat("\n")
cat("##","Unique","\n")
cat("\n")
data_raw <- as.data.frame(res_raw[[1]])
data <- filtering(data_raw)
results <- data.frame(matrix(ncol = length(sample_names_unique), nrow = nrow(data)))
colnames(results) <- sample_names_unique
i <- 0
for (i in 1:length(sample_names_unique)){
name <- sample_names_unique[i]
tmp <- (data[name] >= unique)
results[i] <- tmp
cat(name,"<br/>",sum(tmp, na.rm = TRUE),"<br/>")
}
results_2gether_unique[1,] <- sapply(results,function(x)sum(x, na.rm = TRUE))
results$FASTA <- data$Fasta.headers
results_unique[[1]] <- results
cat("\n")
#create heatmaps
paletteLength <- 2
myColor <- colorRampPalette(c("navy", "white",
"firebrick3"))(paletteLength)
data4pheatmap <- results[,-ncol(results)]
data4pheatmap <- data4pheatmap*1
data4pheatmap[data4pheatmap == 0] <- NA
# remove NA rows
ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
data4pheatmap_clear <- data4pheatmap[!ind,]
data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
if (ncol(data4pheatmap) < 2){
cat("\n")
cat("##"," No heatmap possible. Too less columns in the result
table.","\n")
} else {
pheatmap(data4pheatmap_clear,
legend_breaks = c(0,1),
color = myColor,
treeheight_row = 10,
angle_col ="45",
treeheight_col = 10,
legend = TRUE,
labels_row = rep("",nrow(data4pheatmap)),
labels_col = sample_names_raw)
cat("\n")
}
}
[1] “!NA values detected!” Reverse 1 100 Unique.peptides.E3_D1_a_01
9
Unique.peptides.E3_D1_a_02
6
Unique.peptides.E3_D1_a_03
4
Unique.peptides.E3_D1_b_01
4
Unique.peptides.E3_D1_b_02
6
Unique.peptides.E3_D1_b_03
7
Unique.peptides.E3_D1_c_01
2
Unique.peptides.E3_D1_c_02
2
Unique.peptides.E3_D1_c_03
1
Unique.peptides.E3_D1_d_01
3
Unique.peptides.E3_D1_d_02
3
Unique.peptides.E3_D1_d_03
2
Unique.peptides.E3_D1_e_01
8
Unique.peptides.E3_D1_e_02
7
Unique.peptides.E3_D1_e_03
9
Unique.peptides.E3_D1_f_01
6
Unique.peptides.E3_D1_f_02
6
Unique.peptides.E3_D1_f_03
6
if (LFQ == TRUE) {
cat("\n")
cat("##","LFQ","\n")
cat("\n")
data_raw <- as.data.frame(res_raw[[1]])
data <- filtering(data_raw)
results <- data.frame(matrix(ncol = length(sample_names_lfq), nrow = nrow(data)))
colnames(results) <- sample_names_lfq
i <- 0
for (i in 1:length(sample_names_lfq)){
name <- sample_names_lfq[i]
tmp <- log2(data[name])
tmp[tmp == -Inf] <- NA
results[i] <- tmp
cat(name,"<br/>",sum(!is.na(tmp)),"<br/>")
}
results_2gether_lfq[1,] <- sapply(results,function(x)sum(!is.na(x)))
results$FASTA <- data$Fasta.headers
results_lfq[[1]] <- results
cat("\n")
#create heatmaps
paletteLength <- 50
myColor <- colorRampPalette(c("navy", "white",
"firebrick3"))(paletteLength)
data4pheatmap <- results[,-ncol(results)]
data4pheatmap <- data4pheatmap*1
data4pheatmap[data4pheatmap == 0] <- NA
# remove NA rows
ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
data4pheatmap_clear <- data4pheatmap[!ind,]
data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
if (ncol(data4pheatmap) < 2){
cat("\n")
cat("##"," No heatmap possible. Too less columns in the result
table.","\n")
} else {
pheatmap(data4pheatmap_clear,
color = myColor,
treeheight_row = 10,
angle_col ="45",
treeheight_col = 10,
legend = TRUE,
labels_row = rep("",nrow(data4pheatmap)),
labels_col = sample_names_raw)
cat("\n")
}
}
# show summary
if (RAZOR == TRUE){
cat("\n")
cat("##"," Summary Razor","\n")
print(kable(results_2gether_razor))
ggplot(melt(results_2gether_razor),aes(x = variable, y = value)) +
geom_boxplot() +
xlab("Sample name") +
scale_x_discrete(labels = sample_names_raw) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Razor")
}
| Razor…unique.peptides.E3_D1_a_01 | Razor…unique.peptides.E3_D1_a_02 | Razor…unique.peptides.E3_D1_a_03 | Razor…unique.peptides.E3_D1_b_01 | Razor…unique.peptides.E3_D1_b_02 | Razor…unique.peptides.E3_D1_b_03 | Razor…unique.peptides.E3_D1_c_01 | Razor…unique.peptides.E3_D1_c_02 | Razor…unique.peptides.E3_D1_c_03 | Razor…unique.peptides.E3_D1_d_01 | Razor…unique.peptides.E3_D1_d_02 | Razor…unique.peptides.E3_D1_d_03 | Razor…unique.peptides.E3_D1_e_01 | Razor…unique.peptides.E3_D1_e_02 | Razor…unique.peptides.E3_D1_e_03 | Razor…unique.peptides.E3_D1_f_01 | Razor…unique.peptides.E3_D1_f_02 | Razor…unique.peptides.E3_D1_f_03 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 12 | 9 | 7 | 7 | 10 | 11 | 4 | 6 | 5 | 4 | 7 | 6 | 11 | 11 | 12 | 10 | 10 | 10 |
if (UNIQUE == TRUE){
cat("\n")
cat("##"," Summary Unique","\n")
print(kable(results_2gether_unique))
ggplot(melt(results_2gether_unique),aes(x = variable, y = value)) +
geom_boxplot() +
xlab("Sample name") +
scale_x_discrete(labels = sample_names_raw) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Unique")
}
| Unique.peptides.E3_D1_a_01 | Unique.peptides.E3_D1_a_02 | Unique.peptides.E3_D1_a_03 | Unique.peptides.E3_D1_b_01 | Unique.peptides.E3_D1_b_02 | Unique.peptides.E3_D1_b_03 | Unique.peptides.E3_D1_c_01 | Unique.peptides.E3_D1_c_02 | Unique.peptides.E3_D1_c_03 | Unique.peptides.E3_D1_d_01 | Unique.peptides.E3_D1_d_02 | Unique.peptides.E3_D1_d_03 | Unique.peptides.E3_D1_e_01 | Unique.peptides.E3_D1_e_02 | Unique.peptides.E3_D1_e_03 | Unique.peptides.E3_D1_f_01 | Unique.peptides.E3_D1_f_02 | Unique.peptides.E3_D1_f_03 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 9 | 6 | 4 | 4 | 6 | 7 | 2 | 2 | 1 | 3 | 3 | 2 | 8 | 7 | 9 | 6 | 6 | 6 |
if (LFQ == TRUE){
cat("\n")
cat("##"," Summary LFQ","\n")
print(kable(results_2gether_lfq))
ggplot(melt(results_2gether_lfq),aes(x = variable, y = value)) +
geom_boxplot() +
xlab("Sample name") +
scale_x_discrete(labels = sample_names_raw) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "LFQ")
}
# save results
results_final <- data.frame(results_2gether_razor,results_2gether_unique,results_2gether_lfq)
# get rid of NA columns
results_final <- results_final[,colSums(is.na(results_final))<nrow(results_final)]
write.csv2(results_final, file = paste(params$pathRDS,"_summary.csv",sep=""))
saveRDS(results_razor, file = paste(params$pathRDS,"_Razor.RDS",sep=""))
saveRDS(results_unique, file = paste(params$pathRDS,"_Unique.RDS",sep=""))
saveRDS(results_lfq, file = paste(params$pathRDS,"_LFQ.RDS",sep=""))
p <- ggplot(melt(results_2gether_razor),aes(x = variable, y = value)) +
geom_boxplot() +
xlab("Sample name") +
scale_x_discrete(labels = sample_names_raw) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Razor")
ggplotly(p)
dat <- results_razor[[1]]
yeasx <- grep("YEASX",dat$FASTA)
wheat <- grep("WHEAT",dat$FASTA)
# write human oder trica in a column
for (i in 1:nrow(dat)){
if (grepl("YEASX",dat$FASTA[i])){
dat$org[i] <- "YEASX"
}
else if (grepl("WHEAT",dat$FASTA[i])){
dat$org[i] <- "WHEAT"
}
else {
dat$org[i] <- NA
}
}
dat$org[intersect(yeasx,wheat)] <- "YEASX/WHEAT"
dat4plot <- dat[,-c(ncol(dat)-1)]
dat4plot_long <- melt(dat4plot, id = c("org"))
tmp <- regmatches(dat4plot_long$variable,gregexpr("(?<=des.).*",dat4plot_long$variable,perl=TRUE))
tmp <- unlist(tmp)
dat4plot_long$variable <- tmp
p <- ggplot(data=dat4plot_long, aes(x=variable, y=as.numeric(value), fill=org)) +
stat_summary(fun.y = sum, geom = "bar", position = "dodge") +
xlab("sample name") +
ylab("counts") +
theme(axis.text.x = element_text(angle = 90))
ggplotly(p)
for (i in 1:(ncol(dat)-2)){
cat("\n")
text <- paste(" Fasta Headers - Sample",sample_names_raw[i])
cat("##",text,"\n")
cat("\"n")
print(dat$fasta[which(dat[,i])])
}
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
## [1] "Time used for analysis: 0.45 minutes"